
pacman::p_load(fst,
               fs,
               lubridate,
               tidyverse
               )

Determine_Data_Available <- function(model) {
  
  path_model <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_", model, 
                       "_", SUFFIX_FITTED_, "/") 
  
  dir_exists(path_model)
  
}

v_existing_models <- map_lgl(V_POTENTIAL_MODELS, ~ Determine_Data_Available(.))

v_models <- V_POTENTIAL_MODELS[v_existing_models]

start_year <- year(TEST_START_QTR)
end_year <- year(TEST_END_QTR)

v_years <- start_year:end_year

df_fips <- read_csv("../../data/fips.csv") %>% 
  distinct(state, state_code)

Load_Year_Fitted <- function(year) {
  
  for (model_ in v_models) {
    
    path_model <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_", model_, 
                         "_", SUFFIX_FITTED_, "/") 
    
    file_info <- tibble(file_name = dir_ls(path_model)) %>% 
      mutate(
        qtr = ymd(str_extract(file_name, "[0-9]{8}")),
        rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)"),
        qtr_year = year(qtr)
      ) %>% 
      filter(qtr_year == year)
    
    df_model_fitted <- map_dfr(file_info$file_name, ~ read_fst(.)) %>% 
      rename(
        "{model_}" := 3
      ) %>% 
      filter(!is.na(cid))
    
    assign(paste0("df_", model_), df_model_fitted)
  }
  
  paste0("df_", v_models) %>%
    map(., ~ get(.)) %>% 
    reduce(left_join, by = c("cid", "qtr"))
    
}

Load_Year_Raw <- function(data_year) {
  
  test_qtrs <- seq.Date(as.Date(TEST_START_QTR), as.Date(TEST_END_QTR), by = "quarter")
  
  file_info <- tibble(file_name = dir_ls("../../data_qtr_rand_no_cid_with_outcome/")) %>% 
    mutate(
      qtr = ymd(str_extract(file_name, "[0-9]{8}")),
      rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)")
    )
  
  test_files <- file_info %>% 
    filter(qtr %in% test_qtrs, rand_no_cid %in% RAND_NO_CID) %>% 
    mutate(
      year = year(qtr)
    ) %>% 
    filter(year == data_year)
  
  map_dfr(test_files$file_name, ~ read_fst(., columns = c("cid", "qtr", "state", "county_code", "census_tract", "census_block")))
  
}

Load_CRA <- function(data_year) {
  
  path_CRA <- paste0("../../data/CRA/final/", data_year, ".csv")
  df <- read_csv(path_CRA) 
  
  df %>% 
    left_join(., df_fips, by = c("State" = "state_code"))  %>%
    select(-State)
  
}


Save_Merged_Race_Fitted_Values <- function(year) {
  
  df_CRA <- Load_CRA(year)
  
  Load_Year_Fitted(year) %>% 
    left_join(., Load_Year_Raw(year), by = c("cid", "qtr")) %>% 
    left_join(., df_CRA, by = c("state", "county_code" = "County", "census_tract" = "Tract"))
  
}

map_dfr(v_years, ~ Save_Merged_Race_Fitted_Values(.)) %>% 
  filter(!is.na(Income_Level)) %>% 
  select(cid, qtr, Year, xgb, logistic, riskscore, t_default, is_thick, Income_Level) %>%  
  write_fst(., paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_merged_", SUFFIX_FITTED_, "/", "test_fitted_CRA.fst"))






